import requests
import lxml.etree as le
import ktool
import json
import pandas as pd
import urllib.parse as up

url = "https://www.runoob.com/html/html-tutorial.html"
x = '//div[@id="leftcolumn"]/a/text()'
y = '//div[@id="leftcolumn"]//a/@href'
content = requests.get(url= url).content #返回HTML对象

results_name = ktool.xpath.xpath_all(content, x)
contentx = le.HTML(content)
results_url = contentx.xpath(y)

url = []
for category_url in results_url:
    results_url = 'https://www.runoob.com/'
    results_url += category_url
    url.append(results_url)
    #print(results_url)

name = []
for category_name in results_name:
    category_name = str.strip(category_name)
    name.append(category_name)
    #print(results_name)

df = pd.DataFrame(columns={'category_name', 'results_url'})
df['category_name'] = name
df['results_url'] = url
print(df)

df.to_excel('./菜鸟教程.xls')

